In [118]:
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import AgglomerativeClustering
from kmodes.kprototypes import KPrototypes
from sklearn.preprocessing import LabelEncoder
In [119]:
INPUT_PATH = Path("datasets/bfar.csv")
DF = pd.read_csv(INPUT_PATH)
DF.head()
Out[119]:
RESPONSE A1:AREA A2:GROUP B3:AGE B5:SEX B6:M-STATUS B7:EDUCATION B8:HH_SIZE C1:TOT_INCOME/A C2:INCOME/B/FISH ... J6:AVE_FBP-PERC J7.1 J7.2 J7.3 J7.4 J7.5 J7:AVE_FBP-CONT K:COMMENTS CD: P_SCORE CV: PS_WT
0 1 ABULUG 1 33 1 2 3 2 1 1 ... 4.67 3.0 3.0 5.0 5.0 5.0 4.2 NaN NaN NaN
1 2 ABULUG 1 57 1 3 3 2 1 1 ... 5.00 5.0 5.0 5.0 5.0 5.0 5.0 NaN NaN NaN
2 3 ABULUG 1 45 1 2 3 2 1 1 ... 4.83 5.0 5.0 5.0 5.0 5.0 5.0 NaN NaN NaN
3 4 ABULUG 1 32 1 1 3 3 1 1 ... 4.33 5.0 5.0 5.0 5.0 5.0 5.0 NaN NaN NaN
4 5 ABULUG 1 38 1 1 3 1 1 1 ... 5.00 1.0 5.0 5.0 5.0 5.0 4.2 NaN NaN NaN

5 rows × 215 columns

In [120]:
print("Data Frame Shape", DF.shape)
Data Frame Shape (1339, 215)
In [121]:
# Check if there is missing values
MISSING_DATA = DF.isnull().sum()
MISSING_DATA[MISSING_DATA > 0]
Out[121]:
Y_BOAT-RE           740
NY_W/BOAT           740
BOAT_COND           740
J1:BOAT_AGREE       740
J2:BOAT_TYPE        740
J3:BOAT_DESIGN      740
J4:BOAT_COND        740
J4: REASON-NO       740
J5.1                740
J5.2                740
J5.3                740
J5.4                740
J5.5                740
J5.6                740
J5.7                740
J6:AVE_FBP-IMPT     740
J6.1                740
J6.2                740
J6.3                740
J6.4                740
J6.5                740
J6.6                740
J6:AVE_FBP-PERC     740
J7.1                740
J7.2                740
J7.3                740
J7.4                740
J7.5                740
J7:AVE_FBP-CONT     740
K:COMMENTS         1339
CD: P_SCORE        1339
CV: PS_WT          1339
dtype: int64
In [122]:
sns.set(style="whitegrid")

# Before
ROWS_BEFORE = len(DF)
print(f"Rows before dropping duplicates: {ROWS_BEFORE}")

# After
DF = DF.drop_duplicates()
ROWS_AFTER = len(DF)
print(f"Remaining rows after dropping duplicates: {ROWS_AFTER}")
Rows before dropping duplicates: 1339
Remaining rows after dropping duplicates: 1339
In [123]:
DATA = {
    'Stage': ['Before Dropping Duplicates', 'After Dropping Duplicates'],
    'Row Count': [ROWS_BEFORE, ROWS_AFTER]
}
PLOT_DF = pd.DataFrame(DATA)

plt.figure(figsize=(8, 6))
sns.barplot(x='Stage', y='Row Count', hue='Stage', data=PLOT_DF, palette=['lightblue', 'lightblue'], dodge=False)
plt.title('Effect of Dropping Duplicate Rows', fontsize=14, pad=10)
plt.xlabel('Stage', fontsize=12)
plt.ylabel('Number of Rows', fontsize=12)
plt.legend([],[], frameon=False)
plt.tight_layout()

plt.show()
No description has been provided for this image
In [124]:
print(f"Total columns with missing values: {len(MISSING_DATA)}")
Total columns with missing values: 215
In [125]:
MISSING_PERCENT = DF.isnull().mean().sort_values(ascending=False)  
MISSING_COUNT = DF.isnull().sum()[MISSING_PERCENT.index]  # Get counts in same order

ax = MISSING_PERCENT[MISSING_PERCENT > 0].plot(
    kind='barh', 
    figsize=(10, 8),
    color='lightblue',
    edgecolor='black', 
    title='Missing Values per Column (Descending)',
    width=0.8
)

for index, (col, percent) in enumerate(MISSING_PERCENT[MISSING_PERCENT > 0].items()):
    count = MISSING_COUNT[col]
    label = f"{percent:.1%} ({count:,})"  
    ax.text(
        percent + 0.01,  
        index,         
        label, 
        va='center',   
        fontsize=9
    )

plt.xlabel('Missing Percentage', fontsize=12)
plt.ylabel('Columns', fontsize=12)
plt.xlim(0, 1.1) 
plt.grid(axis='x', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [126]:
# Fix missing values
NUM_COLS = DF.select_dtypes(include='number').columns

for COL in NUM_COLS:
    MEDIAN_VALUE = DF[COL].median()
    DF[COL] = DF[COL].fillna(MEDIAN_VALUE)
c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\nanfunctions.py:1215: RuntimeWarning: Mean of empty slice
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\nanfunctions.py:1215: RuntimeWarning: Mean of empty slice
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
c:\Users\User\AppData\Local\Programs\Python\Python311\Lib\site-packages\numpy\lib\nanfunctions.py:1215: RuntimeWarning: Mean of empty slice
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
In [127]:
# Check if there is still missing values
MISSING_DATA = DF.isnull().sum()
MISSING_DATA[MISSING_DATA > 0]
Out[127]:
K:COMMENTS     1339
CD: P_SCORE    1339
CV: PS_WT      1339
dtype: int64
In [128]:
MISSING_PERCENT = DF.isnull().mean().sort_values(ascending=False)  
MISSING_COUNT = DF.isnull().sum()[MISSING_PERCENT.index]  

ax = MISSING_PERCENT[MISSING_PERCENT > 0].plot(
    kind='barh', 
    figsize=(10, 8),
    color='lightblue',
    edgecolor='black', 
    title='Missing Values per Column (Descending)',
    width=0.8
)

for index, (col, percent) in enumerate(MISSING_PERCENT[MISSING_PERCENT > 0].items()):
    count = MISSING_COUNT[col]
    label = f"{percent:.1%} ({count:,})"  
    ax.text(
        percent + 0.01,  
        index,         
        label, 
        va='center',   
        fontsize=9
    )

plt.xlabel('Missing Percentage', fontsize=12)
plt.ylabel('Columns', fontsize=12)
plt.xlim(0, 1.1) 
plt.grid(axis='x', linestyle='--', alpha=0.4)
plt.tight_layout()
plt.show()
No description has been provided for this image
In [129]:
# Drop columns with 50% or more missing values
THRESHOLD_COL = len(DF) * 0.5
DF = DF.dropna(axis=1, thresh=THRESHOLD_COL)
In [130]:
# Check if column with 50% or more missing values already dropped
DF
Out[130]:
RESPONSE A1:AREA A2:GROUP B3:AGE B5:SEX B6:M-STATUS B7:EDUCATION B8:HH_SIZE C1:TOT_INCOME/A C2:INCOME/B/FISH ... J6.4 J6.5 J6.6 J6:AVE_FBP-PERC J7.1 J7.2 J7.3 J7.4 J7.5 J7:AVE_FBP-CONT
0 1 ABULUG 1 33 1 2 3 2 1 1 ... 5.0 5.0 5.0 4.67 3.0 3.0 5.0 5.0 5.0 4.2
1 2 ABULUG 1 57 1 3 3 2 1 1 ... 5.0 5.0 5.0 5.00 5.0 5.0 5.0 5.0 5.0 5.0
2 3 ABULUG 1 45 1 2 3 2 1 1 ... 4.0 5.0 5.0 4.83 5.0 5.0 5.0 5.0 5.0 5.0
3 4 ABULUG 1 32 1 1 3 3 1 1 ... 5.0 5.0 5.0 4.33 5.0 5.0 5.0 5.0 5.0 5.0
4 5 ABULUG 1 38 1 1 3 1 1 1 ... 5.0 5.0 5.0 5.00 1.0 5.0 5.0 5.0 5.0 4.2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1334 1335 SARANGANI 0 52 1 2 2 1 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1335 1336 SARANGANI 0 63 1 2 2 2 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1336 1337 SARANGANI 0 55 1 2 2 2 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1337 1338 SARANGANI 0 43 1 2 2 1 2 2 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1338 1339 SARANGANI 0 42 1 2 3 1 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2

1339 rows × 212 columns

In [131]:
# Convert all strings to lowercase
TEXT_COLS = DF.select_dtypes(include='object').columns

for COL in TEXT_COLS:
    DF[COL] = DF[COL].astype(str).str.lower().str.strip()
In [132]:
# check if all strings converted to lowercase
DF
Out[132]:
RESPONSE A1:AREA A2:GROUP B3:AGE B5:SEX B6:M-STATUS B7:EDUCATION B8:HH_SIZE C1:TOT_INCOME/A C2:INCOME/B/FISH ... J6.4 J6.5 J6.6 J6:AVE_FBP-PERC J7.1 J7.2 J7.3 J7.4 J7.5 J7:AVE_FBP-CONT
0 1 abulug 1 33 1 2 3 2 1 1 ... 5.0 5.0 5.0 4.67 3.0 3.0 5.0 5.0 5.0 4.2
1 2 abulug 1 57 1 3 3 2 1 1 ... 5.0 5.0 5.0 5.00 5.0 5.0 5.0 5.0 5.0 5.0
2 3 abulug 1 45 1 2 3 2 1 1 ... 4.0 5.0 5.0 4.83 5.0 5.0 5.0 5.0 5.0 5.0
3 4 abulug 1 32 1 1 3 3 1 1 ... 5.0 5.0 5.0 4.33 5.0 5.0 5.0 5.0 5.0 5.0
4 5 abulug 1 38 1 1 3 1 1 1 ... 5.0 5.0 5.0 5.00 1.0 5.0 5.0 5.0 5.0 4.2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1334 1335 sarangani 0 52 1 2 2 1 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1335 1336 sarangani 0 63 1 2 2 2 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1336 1337 sarangani 0 55 1 2 2 2 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1337 1338 sarangani 0 43 1 2 2 1 2 2 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2
1338 1339 sarangani 0 42 1 2 3 1 1 1 ... 5.0 5.0 5.0 4.67 5.0 5.0 5.0 5.0 5.0 4.2

1339 rows × 212 columns

In [133]:
# DIMENSIONALITY REDUCTION

# In this process i will train 3 models
# - PCA
# - Entropy Weighted K-Means
# - Auto Encoder

# and choose the best performing model for identifyin feature weights with the metrics of
# - Silhouette Score
# - Calinski-Harabasz Index
# - Davies-Bouldin Score
In [134]:
# Model configuration
K = 3
DF_NUM = DF.select_dtypes(include='number').copy()
SCALER = MinMaxScaler()
X_SCALED = SCALER.fit_transform(DF_NUM)
In [135]:
# Create a list to hold all metrics
METRICS_SUMMARY = []

def evaluate_clustering(X, LABELS, NAME):
    SILHOUETTE = silhouette_score(X, LABELS)
    CALINSKI = calinski_harabasz_score(X, LABELS)
    DAVIES = davies_bouldin_score(X, LABELS)
    
    METRICS_SUMMARY.append({
        'Model': NAME,
        'Silhouette': SILHOUETTE,
        'Calinski-Harabasz': CALINSKI,
        'Davies-Bouldin': DAVIES
    })
    
    RESULTS_DF = pd.DataFrame([{
        'Model': NAME,
        'Silhouette': f"{SILHOUETTE:.4f}",
        'Calinski-Harabasz': f"{CALINSKI:.4f}",
        'Davies-Bouldin': f"{DAVIES:.4f}"
    }]).set_index('Model')
    
    print(f"\n{NAME} Clustering Evaluation:")
    display(RESULTS_DF.style
           .set_properties(**{'text-align': 'center'})
           .format(precision=4))
In [136]:
# PCA
PCA_MODEL = PCA(n_components=2)
X_PCA = PCA_MODEL.fit_transform(X_SCALED)

KMEANS_PCA = KMeans(n_clusters=K, random_state=42, n_init=10)
LABELS_PCA = KMEANS_PCA.fit_predict(X_PCA)

evaluate_clustering(X_PCA, LABELS_PCA, "PCA")
PCA Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
PCA 0.3980 1362.5074 0.8724
In [137]:
# Autoencoder
INPUT_DIM = X_SCALED.shape[1]

INPUT_LAYER = Input(shape=(INPUT_DIM,))
ENCODED = Dense(16, activation='relu')(INPUT_LAYER)
BOTTLENECK = Dense(2, activation='linear')(ENCODED)
DECODED = Dense(16, activation='relu')(BOTTLENECK)
OUTPUT_LAYER = Dense(INPUT_DIM, activation='sigmoid')(DECODED)

AUTOENCODER = Model(INPUT_LAYER, OUTPUT_LAYER)
AUTOENCODER.compile(optimizer=Adam(learning_rate=0.01), loss='mse')

AUTOENCODER.fit(X_SCALED, X_SCALED, epochs=100, batch_size=32, verbose=0,
                callbacks=[EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)])

ENCODER = Model(inputs=INPUT_LAYER, outputs=BOTTLENECK)
X_AE = ENCODER.predict(X_SCALED)

KMEANS_AE = KMeans(n_clusters=K, random_state=42, n_init=10)
LABELS_AE = KMEANS_AE.fit_predict(X_AE)

evaluate_clustering(X_AE, LABELS_AE, "Autoencoder")
42/42 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step

Autoencoder Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
Autoencoder 0.4738 1577.7962 0.8216
In [138]:
# Entropy Weighted K-Means
def entropy_weights(X):
    X_NORM = X / (X.sum(axis=0) + 1e-9)
    X_NORM = np.where(X_NORM == 0, 1e-9, X_NORM)
    ENTROPY = -np.sum(X_NORM * np.log(X_NORM), axis=0) / np.log(len(X))
    D = 1 - ENTROPY
    WEIGHTS = D / D.sum()
    return WEIGHTS

WEIGHTS = entropy_weights(X_SCALED)
X_ENTROPY = X_SCALED * WEIGHTS

KMEANS_ENTROPY = KMeans(n_clusters=K, random_state=42, n_init=10)
LABELS_ENTROPY = KMEANS_ENTROPY.fit_predict(X_ENTROPY)

evaluate_clustering(X_ENTROPY, LABELS_ENTROPY, "Entropy-Weighted")
Entropy-Weighted Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
Entropy-Weighted 0.1686 86.6478 3.0035
In [139]:
METRICS_DF = pd.DataFrame(METRICS_SUMMARY)
SCORING_DF = METRICS_DF.copy()

# Invert Davies-Bouldin because lower is better
SCORING_DF['Inverse Davies-Bouldin'] = 1 / (SCORING_DF['Davies-Bouldin'] + 1e-6)

# Scale all metrics between 0 and 1
SCALER = MinMaxScaler()
SCALED_METRICS = SCALER.fit_transform(SCORING_DF[['Silhouette', 'Calinski-Harabasz', 'Inverse Davies-Bouldin']])

# Add composite score (mean of scaled metrics)
SCORING_DF['Composite Score'] = SCALED_METRICS.mean(axis=1)

# Find the best model
BEST_MODEL = SCORING_DF.loc[SCORING_DF['Composite Score'].idxmax()]

# Show metrics summary
print("\nOverall Clustering Metrics Summary:")
display(METRICS_DF)
Overall Clustering Metrics Summary:
Model Silhouette Calinski-Harabasz Davies-Bouldin
0 PCA 0.398025 1362.507416 0.872423
1 Autoencoder 0.473812 1577.796218 0.821649
2 Entropy-Weighted 0.168575 86.647821 3.003505
In [140]:
print("\nBest Performing Model")
display(pd.DataFrame({
    '': ['Model', 'Composite Score'],
    'Best Performance': [BEST_MODEL['Model'], f"{BEST_MODEL['Composite Score']:.4f}"]
}).style.hide(axis='index'))
Best Performing Model
Best Performance
Model Autoencoder
Composite Score 1.0000
In [141]:
# Plot scattered distribution in each model to check if the best performing model is reasonable
plt.figure(figsize=(15, 4))

plt.subplot(1, 3, 1)
sns.scatterplot(x=X_PCA[:, 0], y=X_PCA[:, 1], hue=LABELS_PCA, palette="tab10")
plt.title("PCA + KMeans")

plt.subplot(1, 3, 2)
sns.scatterplot(x=X_AE[:, 0], y=X_AE[:, 1], hue=LABELS_AE, palette="tab10")
plt.title("Autoencoder + KMeans")

plt.subplot(1, 3, 3)
sns.scatterplot(x=X_ENTROPY[:, 0], y=X_ENTROPY[:, 1], hue=LABELS_ENTROPY, palette="tab10")
plt.title("Entropy Weighted + KMeans")

plt.tight_layout()
plt.show()
No description has been provided for this image
In [142]:
print("\nExtracting Feature Weights from Best Model...\n")

if BEST_MODEL['Model'] == "PCA":
    PCA_WEIGHTS = np.abs(PCA_MODEL.components_[:2]).mean(axis=0)
    FEATURE_WEIGHTS = pd.Series(PCA_WEIGHTS, index=DF_NUM.columns).sort_values(ascending=False)

elif BEST_MODEL['Model'] == "Autoencoder":
    ENCODER_WEIGHTS = np.abs(AUTOENCODER.layers[1].get_weights()[0])  
    SUMMED_WEIGHTS = ENCODER_WEIGHTS.sum(axis=1)
    FEATURE_WEIGHTS = pd.Series(SUMMED_WEIGHTS, index=DF_NUM.columns).sort_values(ascending=False)

elif BEST_MODEL['Model'] == "Entropy-Weighted":
    FEATURE_WEIGHTS = pd.Series(WEIGHTS, index=DF_NUM.columns).sort_values(ascending=False)
Extracting Feature Weights from Best Model...

In [143]:
# Show top features
print("📌 Top Contributing Features:")
display(FEATURE_WEIGHTS.head(10))
📌 Top Contributing Features:
D2/A:Ind_APP     11.972435
D1/A:Ind_VEH     11.090771
D1/B:Ind_VEH     10.557349
D2.7-A_QTY        8.336466
D2/B:Ind_APP      8.041806
D2.7:A_STOVE      8.025345
H:Ind_ASWS        7.619016
D3.4-A_QTY        7.268851
D3.4:A_OTHERS     7.225698
D3.1:B_CP         7.176279
dtype: float32
In [144]:
plt.figure(figsize=(10, 5))
FEATURE_WEIGHTS.head(10).plot(kind='barh')
plt.gca().invert_yaxis()
plt.title(f"Top 10 Feature Weights - {BEST_MODEL['Model']}")
plt.xlabel("Weight")
plt.tight_layout()
plt.show()
No description has been provided for this image
In [145]:
print("All Feature Weights:")
display(FEATURE_WEIGHTS.to_frame(name="Weight").style
       .background_gradient(cmap='Blues', subset=['Weight'])
       .format({'Weight': '{:.4f}'})
       .set_properties(**{'text-align': 'center'})
       .set_caption(f"Feature Importance ({BEST_MODEL['Model']})"))
All Feature Weights:
Feature Importance (Autoencoder)
  Weight
D2/A:Ind_APP 11.9724
D1/A:Ind_VEH 11.0908
D1/B:Ind_VEH 10.5573
D2.7-A_QTY 8.3365
D2/B:Ind_APP 8.0418
D2.7:A_STOVE 8.0253
H:Ind_ASWS 7.6190
D3.4-A_QTY 7.2689
D3.4:A_OTHERS 7.2257
D3.1:B_CP 7.1763
D2.9-A_QTY 7.0621
D2.10:A_OTHERS 7.0049
D2.9:A_FURNITURE 6.8864
D2.3-B_QTY 6.8597
C1:TOT_INCOME/A 6.6398
C2:INCOME/B/FISH 6.4967
J4: REASON-NO 6.4826
D3.1-B_QTY 6.3860
J4:BOAT_COND 6.3384
D2.10-A_QTY 6.2314
C4:INCOME/B/ALT 6.2273
D2.3:B_WASH-M 6.1217
D2.3:A_WASH-M 5.8258
D3.3-A_QTY 5.7777
D2.6:A_FRIDGE 5.7259
D3/B:YC_GAD 5.6129
D1.7:A_OTHERS 5.6088
NY_W/BOAT 5.5187
G2:B_GSIS 5.5068
D2.1-B_QTY 5.4941
D1.3:A_TRICYCLE 5.4154
D2.6-A_QTY 5.4007
D1.3-A_QTY 5.3903
D3/B:AC_GAD 5.3257
A2:GROUP 5.3242
D2.3-A_QTY 5.3200
D1.4:A_CAR 5.3064
D3.3:A_COMPUTER 5.2791
G2:A_GSIS 5.1508
D1.3-B_QTY 5.1173
D1.5:B_JEEP 5.0887
D3.2:B_LANDLINE 5.0857
J2:BOAT_TYPE 5.0698
D1.4-A_QTY 5.0177
D2.1:B_TV 4.9475
D3.2:A_LANDLINE 4.9201
D1.7-A_QTY 4.8969
D2.7:B_STOVE 4.8598
G4:B_PN-IN 4.8166
H2:RET_P 4.8120
D2.7-B_QTY 4.7763
E5:B_NET-SUBS 4.7589
G4:A_PN-IN 4.7491
D1.6-B_QTY 4.7351
D3/A:IndGAD 4.7251
D1.2-A_QTY 4.6605
D1.2:A_MOTORC 4.6564
D1.3:B_TRICYCLE 4.6467
D1.5-B_QTY 4.6424
I4:TFA 4.5250
D1.5:A_JEEP 4.4779
H3:SPES 4.4556
G5:B_LIFE-IN 4.4346
H7:AS_P 4.3253
D1.6:B_TRUCK 4.2873
H5:TBE 4.2441
D3.2-A_QTY 4.2151
D1.5-A_QTY 4.1950
H6:F_PC 4.1888
D2.4-A_QTY 4.1476
D2.10-B_QTY 4.1426
D2.2-A_QTY 4.1202
I6.2:FT 4.1182
J3:BOAT_DESIGN 4.0760
G6:A_HEALTH-IN 4.0727
D3.1:A_CP 4.0673
H8:E/CW_P 4.0638
D2.10:B_OTHERS 4.0612
I5:TFV 4.0178
B5:SEX 4.0003
I8.4:FISH_COMP 3.9715
I8.3:BOAT_P 3.9552
D3.3-B_QTY 3.9343
D2.9:B_FURNITURE 3.9335
D1.4-B_QTY 3.9324
D2/A:YC_APP 3.8778
D1.6:A_TRUCK 3.8764
D2.4:A_AC 3.8565
D3.1-A_QTY 3.8130
D3/B:IndGAD 3.8128
D1.1-A_QTY 3.8087
E4:A_COOK-FUEL 3.7855
D1/A:AC_VEH 3.7781
I7.2:LIFE_B 3.7562
B8:HH_SIZE 3.7556
D1.2-B_QTY 3.7203
D1.1:A_BIKE 3.7079
I2:A/C_M 3.6716
D2.9-B_QTY 3.6578
D1.6-A_QTY 3.6078
D3/A:AC_GAD 3.5855
D2.5-B_QTY 3.5096
G5:A_LIFE-IN 3.5005
D3.3:B_COMPUTER 3.4910
J7.2 3.4822
D1/B:AC_VEH 3.4589
G6:B_HEALTH-IN 3.4456
D2.6-B_QTY 3.4389
E5:A_NET-SUBS 3.4294
D2.4-B_QTY 3.4258
J5.3 3.3975
D3.4:B_OTHERS 3.3949
D2.8:A_E-HEATER 3.3770
D2.1-A_QTY 3.3633
H4:AL_P 3.3624
C5:TOT_INCOME/B 3.3542
D1.2:B_MOTORC 3.3385
D3.2-B_QTY 3.3171
I8.5:OTHERS 3.2762
D2.2-B_QTY 3.2625
BOAT_COND 3.2572
H1:4Ps 3.2554
D2/B:YC_APP 3.2539
B7:EDUCATION 3.2443
D2.4:B_AC 3.2435
D3/A:YC_GAD 3.2380
D1.7:B_OTHERS 3.2241
J7.4 3.1988
I3:NOP_H 3.1923
D2.8:B_E-HEATER 3.1893
RESPONSE 3.1747
D1.4:B_CAR 3.1730
D1.7-B_QTY 3.1718
F1:A_HOUSE-OWN 3.1361
Y_BOAT-RE 3.1098
I6.5:OTHERS 3.1031
D2.1:A_TV 3.0895
D3.4-B_QTY 3.0713
E4:B_COOK-FUEL 3.0459
J7.1 3.0371
D2.6:B_FRIDGE 3.0246
D2.8-A_QTY 3.0146
G3:A_PhilHealth 3.0024
G/A:Ind_INSU 2.9828
J5.5 2.9494
J7.5 2.9400
D2.8-B_QTY 2.9323
F3:A_HOUSE-BUILT 2.8753
D2.2:B_DVD 2.8692
D1/A:YC_VEH 2.8685
J6.6 2.8683
F3:B_HOUSE-BUILT 2.8509
D2.2:A_DVD 2.8442
F2:A_HOUSE-ACQ 2.8280
F2:B_HOUSE-ACQ 2.8086
J5.6 2.8036
D/B:AVE_Ind_PA 2.7986
J6.2 2.7774
J5.2 2.7759
D2/B:AC_APP 2.7751
J7:AVE_FBP-CONT 2.7746
I7.5:OTHERS 2.7525
I7.1:LIFE_J 2.7430
D1/B:YC_VEH 2.7145
F4:B_OTHER-RP 2.6988
D2/A:AC_APP 2.6470
D/A:AVE_Ind_PA 2.6096
E3:A_POWER-SUP 2.5863
J6.1 2.5729
D2.5:B_E-FAN 2.5700
J5.4 2.5405
J7.3 2.5182
J5.7 2.4582
G1:B_SSS 2.4189
I8.1:BAD_W 2.3854
F1:B_HOUSE-OWN 2.3597
J6:AVE_FBP-IMPT 2.3348
I8.2:FISH_R 2.3142
I1:FD_Y 2.2985
I7.4:CP 2.2456
J6.4 2.2419
J6:AVE_FBP-PERC 2.2241
F4:A_OTHER-RP 2.2120
I7.3:F_LIGHT 2.1926
B3:AGE 2.1627
D2.5:A_E-FAN 2.1615
A: SES_INDEX 2.1443
J6.3 2.1325
I6.3:PPN 2.1288
J1:BOAT_AGREE 2.1277
G1:A_SSS 2.1230
G/B:Ind_INSU 2.1203
E1:B_DRINK-H2O 2.1044
I6.4:H&L 2.0855
E3:B_POWER-SUP 2.0800
J6.5 2.0462
D2.5-A_QTY 2.0303
F/A:Ind_REALP 2.0281
J5.1 1.9694
I6.1:GN 1.9538
B: SES_INDEX 1.9419
G3:B_PhilHealth 1.9105
E1:A_DRINK-H2O 1.8638
E2:B_DOMESTIC-H2O 1.8595
F/B:Ind_REALP 1.8511
D1.1:B_BIKE 1.8131
E/B:Ind_LIFECON 1.7700
E/A:Ind_LIFECON 1.7546
E2:A_DOMESTIC-H2O 1.5413
D1.1-B_QTY 1.4177
B6:M-STATUS 1.2784
In [146]:
# ENSEMBLE LEARNING FOR CLUSTERING

# In this process i will train 3 models
# - Entropy Weighted K-Means
# - K-Prototypes
# - Hierarchical Clustering

# and choose the best performing model for identifyin feature weights with the metrics of
# - Silhouette Score
# - Calinski-Harabasz Index
# - Davies-Bouldin Score
In [147]:
# Model configuration
K = 3
DF_CLEANED = DF.copy()
In [148]:
# Separate numeric and categorical
DF_NUM = DF_CLEANED.select_dtypes(include='number')
DF_CAT = DF_CLEANED.select_dtypes(include='object')
In [149]:
# Normalize numeric data
SCALER = MinMaxScaler()
X_NUM_SCALED = SCALER.fit_transform(DF_NUM)
In [150]:
# Label encode categorical data
LABEL_ENCODERS = {}
DF_CAT_ENCODED = DF_CAT.copy()
for COL in DF_CAT.columns:
    LE = LabelEncoder()
    DF_CAT_ENCODED[COL] = LE.fit_transform(DF_CAT[COL])
    LABEL_ENCODERS[COL] = LE

X_CAT_ENCODED = DF_CAT_ENCODED.values
In [151]:
# Combine numeric and categorical
X_MIXED = np.concatenate([X_NUM_SCALED, X_CAT_ENCODED], axis=1)
In [152]:
# Created a list that holds all the metrics
METRICS_SUMMARY_2 = []

def evaluate_model(X, LABELS, NAME):
    SILHOUETTE = silhouette_score(X, LABELS)
    CALINSKI = calinski_harabasz_score(X, LABELS)
    DAVIES = davies_bouldin_score(X, LABELS)
    
    METRICS_SUMMARY_2.append({
        'Model': NAME,
        'Silhouette': SILHOUETTE,
        'Calinski-Harabasz': CALINSKI,
        'Davies-Bouldin': DAVIES
    })
    
    RESULTS_DF = pd.DataFrame([{
        'Model': NAME,
        'Silhouette': f"{SILHOUETTE:.4f}",
        'Calinski-Harabasz': f"{CALINSKI:.4f}",
        'Davies-Bouldin': f"{DAVIES:.4f}"
    }]).set_index('Model')
    
    print(f"\n{NAME} Clustering Evaluation:")
    display(RESULTS_DF.style
           .set_properties(**{'text-align': 'center'})
           .format(precision=4))
In [153]:
# Entropy Weigthed K-Means
def entropy_weights(X):
    X_NORM = X / (X.sum(axis=0) + 1e-9)
    X_NORM = np.where(X_NORM == 0, 1e-9, X_NORM)
    ENTROPY = -np.sum(X_NORM * np.log(X_NORM), axis=0) / np.log(len(X))
    D = 1 - ENTROPY
    WEIGHTS = D / D.sum()
    return WEIGHTS

WEIGHTS = entropy_weights(X_NUM_SCALED)
X_WEIGHTED = X_NUM_SCALED * WEIGHTS

KMEANS_WEIGHTED = KMeans(n_clusters=K, random_state=42, n_init=10)
LABELS_WKMEANS = KMEANS_WEIGHTED.fit_predict(X_WEIGHTED)

evaluate_model(X_WEIGHTED, LABELS_WKMEANS, "Weighted KMeans")
Weighted KMeans Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
Weighted KMeans 0.1686 86.6478 3.0035
In [154]:
# K-Prototypes
KPROTO = KPrototypes(n_clusters=K, init='Cao', n_init=5, verbose=0)
LABELS_KPROTO = KPROTO.fit_predict(X_MIXED, categorical=list(range(X_NUM_SCALED.shape[1], X_MIXED.shape[1])))

evaluate_model(X_MIXED, LABELS_KPROTO, "K-Prototypes")
K-Prototypes Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
K-Prototypes 0.0127 36.5386 4.7558
In [155]:
# Hierarchical Clustering
HIERARCHICAL = AgglomerativeClustering(n_clusters=K, linkage='ward')
LABELS_HIER = HIERARCHICAL.fit_predict(X_NUM_SCALED)

evaluate_model(X_NUM_SCALED, LABELS_HIER, "Hierarchical Clustering")
Hierarchical Clustering Clustering Evaluation:
  Silhouette Calinski-Harabasz Davies-Bouldin
Model      
Hierarchical Clustering 0.0782 69.2062 3.0031
In [156]:
METRICS_DF2 = pd.DataFrame(METRICS_SUMMARY_2)
SCORING_DF2 = METRICS_DF2.copy()
SCORING_DF2['Inverse Davies-Bouldin'] = 1 / (SCORING_DF2['Davies-Bouldin'] + 1e-6)
SCALER = MinMaxScaler()
SCALED = SCALER.fit_transform(SCORING_DF2[['Silhouette', 'Calinski-Harabasz', 'Inverse Davies-Bouldin']])
SCORING_DF2['Composite Score'] = SCALED.mean(axis=1)
In [157]:
BEST_MODEL2 = SCORING_DF2.loc[SCORING_DF2['Composite Score'].idxmax()]

print("\nBest Performing Clustering Model:")
display(pd.DataFrame({
    '': ['Model', 'Composite Score'],
    'Best Performance': [BEST_MODEL2['Model'], f"{BEST_MODEL2['Composite Score']:.4f}"]
}).style.hide(axis='index'))
Best Performing Clustering Model:
Best Performance
Model Weighted KMeans
Composite Score 0.9999
In [158]:
FINAL_LABELS = None

if BEST_MODEL2['Model'] == "Weighted KMeans":
    FINAL_MODEL = KMeans(n_clusters=K, random_state=42, n_init=10)
    FINAL_LABELS = FINAL_MODEL.fit_predict(X_WEIGHTED)

elif BEST_MODEL2['Model'] == "K-Prototypes":
    FINAL_MODEL = KPrototypes(n_clusters=K, init='Cao', n_init=5, verbose=0)
    FINAL_LABELS = FINAL_MODEL.fit_predict(X_MIXED, categorical=list(range(X_NUM_SCALED.shape[1], X_MIXED.shape[1])))

elif BEST_MODEL2['Model'] == "Hierarchical Clustering":
    FINAL_MODEL = AgglomerativeClustering(n_clusters=K, linkage='ward')
    FINAL_LABELS = FINAL_MODEL.fit_predict(X_NUM_SCALED)

DF['final_cluster'] = FINAL_LABELS
C:\Users\User\AppData\Local\Temp\ipykernel_9152\1974701490.py:15: PerformanceWarning: DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
  DF['final_cluster'] = FINAL_LABELS
In [159]:
if BEST_MODEL2['Model'] in ["Weighted KMeans", "K-Prototypes", "Hierarchical Clustering"]:
    from sklearn.decomposition import PCA
    PCA_VIS = PCA(n_components=2)
    X_VIS = PCA_VIS.fit_transform(X_WEIGHTED if BEST_MODEL2['Model'] == "Weighted KMeans" else X_NUM_SCALED)

    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=X_VIS[:, 0], y=X_VIS[:, 1], hue=FINAL_LABELS, palette="Set2", s=60)
    plt.title(f"Clusters by {BEST_MODEL2['Model']}")
    plt.xlabel("PCA 1")
    plt.ylabel("PCA 2")
    plt.legend(title="Cluster")
    plt.tight_layout()
    plt.show()
No description has been provided for this image
In [160]:
print("\nPer Feature Cluster Breakdown:\n")

for FEATURE in DF.columns:
    if FEATURE == 'final_cluster':
        continue

    print(f"--- Feature: {FEATURE} ---")

    if DF[FEATURE].dtype in ['int64', 'float64']:
        FEATURE_SUMMARY = DF.groupby('final_cluster')[FEATURE].agg(['mean', 'std', 'min', 'max', 'count'])
    else:
        FEATURE_SUMMARY = DF.groupby('final_cluster')[FEATURE].value_counts(normalize=True).unstack(fill_value=0)

    print(FEATURE_SUMMARY)
    print("\n")
Per Feature Cluster Breakdown:

--- Feature: RESPONSE ---
                     mean         std  min   max  count
final_cluster                                          
0              610.244648  348.353694   28  1319    327
1              436.244898  445.278964    1  1269     49
2              702.184839  389.759888    3  1339    963


--- Feature: A1:AREA ---
A1:AREA          abulug  alubijid    aparri   bolinao  bugasong    buguey  \
final_cluster                                                               
0              0.045872  0.018349  0.055046  0.045872  0.070336  0.076453   
1              0.387755  0.000000  0.040816  0.061224  0.061224  0.000000   
2              0.029076  0.033229  0.056075  0.019730  0.069574  0.057113   

A1:AREA         gitagum    gumaca    hamtic    itogon  ...    manito  \
final_cluster                                          ...             
0              0.027523  0.024465  0.061162  0.061162  ...  0.003058   
1              0.000000  0.020408  0.000000  0.020408  ...  0.000000   
2              0.019730  0.020768  0.067497  0.024922  ...  0.028037   

A1:AREA          morong   mulanay    pandan  san jose  san juan  sarangani  \
final_cluster                                                                
0              0.070336  0.021407  0.058104  0.030581  0.036697   0.003058   
1              0.061224  0.000000  0.040816  0.061224  0.020408   0.000000   
2              0.021807  0.045691  0.061267  0.065421  0.017653   0.053998   

A1:AREA        sta. ana   talisay   ternate  
final_cluster                                
0              0.058104  0.009174  0.036697  
1              0.061224  0.000000  0.040816  
2              0.040498  0.079958  0.031153  

[3 rows x 23 columns]


--- Feature: A2:GROUP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.403670  0.491385    0    1    327
1              0.551020  0.502545    0    1     49
2              0.456906  0.498398    0    1    963


--- Feature: B3:AGE ---
                    mean        std  min  max  count
final_cluster                                       
0              48.691131  12.557329   19   79    327
1              49.693878  14.695810   20   75     49
2              48.350987  12.710407   18   87    963


--- Feature: B5:SEX ---
                   mean       std  min  max  count
final_cluster                                     
0              1.149847  0.357468    1    2    327
1              1.183673  0.391230    1    2     49
2              1.088266  0.283829    1    2    963


--- Feature: B6:M-STATUS ---
                   mean       std  min  max  count
final_cluster                                     
0              1.987768  0.631366    1    4    327
1              2.000000  0.677003    1    4     49
2              2.066459  0.647839    1    4    963


--- Feature: B7:EDUCATION ---
                   mean       std  min  max  count
final_cluster                                     
0              2.926606  0.872621    1    5    327
1              2.877551  0.753676    2    5     49
2              2.600208  0.699777    1    5    963


--- Feature: B8:HH_SIZE ---
                   mean       std  min  max  count
final_cluster                                     
0              1.966361  0.744365    1    4    327
1              1.897959  0.822722    1    4     49
2              1.936656  0.731768    1    4    963


--- Feature: C1:TOT_INCOME/A ---
                   mean       std  min  max  count
final_cluster                                     
0              1.247706  0.652827    1    6    327
1              1.367347  0.950743    1    5     49
2              1.130841  0.471114    1    6    963


--- Feature: C2:INCOME/B/FISH ---
                   mean       std  min  max  count
final_cluster                                     
0              1.266055  0.601259    1    6    327
1              1.448980  0.980247    1    6     49
2              1.226376  0.557178    1    6    963


--- Feature: C4:INCOME/B/ALT ---
                   mean       std  min  max  count
final_cluster                                     
0              1.269113  0.607514    1    6    327
1              1.469388  0.892143    1    5     49
2              1.125649  0.448834    1    5    963


--- Feature: C5:TOT_INCOME/B ---
                   mean       std  min  max  count
final_cluster                                     
0              2.510703  0.853920    2    6    327
1              2.857143  1.224745    2    6     49
2              2.345794  0.728004    2    6    963


--- Feature: D1.1:A_BIKE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.281346  0.450345    0    1    327
1              0.224490  0.421570    0    1     49
2              0.183801  0.387523    0    1    963


--- Feature: D1.1-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.281346  0.450345    0    1    327
1              0.224490  0.421570    0    1     49
2              0.184839  0.391036    0    2    963


--- Feature: D1.2:A_MOTORC ---
                   mean       std  min  max  count
final_cluster                                     
0              0.504587  0.500745    0    1    327
1              0.367347  0.487078    0    1     49
2              0.236760  0.425315    0    1    963


--- Feature: D1.2-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.504587  0.500745    0    1    327
1              0.367347  0.487078    0    1     49
2              0.236760  0.425315    0    1    963


--- Feature: D1.3:A_TRICYCLE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.198777  0.399691    0    1    327
1              0.142857  0.353553    0    1     49
2              0.026999  0.162165    0    1    963


--- Feature: D1.3-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.198777  0.399691    0    1    327
1              0.142857  0.353553    0    1     49
2              0.026999  0.162165    0    1    963


--- Feature: D1.4:A_CAR ---
                   mean       std  min  max  count
final_cluster                                     
0              0.024465  0.154724    0    1    327
1              0.102041  0.305839    0    1     49
2              0.000000  0.000000    0    0    963


--- Feature: D1.4-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.024465  0.154724    0    1    327
1              0.102041  0.305839    0    1     49
2              0.000000  0.000000    0    0    963


--- Feature: D1.5:A_JEEP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.061224  0.242226    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D1.5-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.081633  0.276642    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D1.6:A_TRUCK ---
                   mean       std  min  max  count
final_cluster                                     
0              0.006116  0.078086    0    1    327
1              0.081633  0.276642    0    1     49
2              0.000000  0.000000    0    0    963


--- Feature: D1.6-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.006116  0.078086    0    1    327
1              0.102041  0.305839    0    1     49
2              0.000000  0.000000    0    0    963


--- Feature: D1.7:A_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.009174  0.095488    0    1    327
1              0.224490  0.421570    0    1     49
2              0.013499  0.115460    0    1    963


--- Feature: D1.7-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.009174  0.095488    0    1    327
1              0.224490  0.421570    0    1     49
2              0.012461  0.110989    0    1    963


--- Feature: D1/A:YC_VEH ---
                    mean        std  min    max  count
final_cluster                                         
0              14.823853  12.902455  0.0   85.7    327
1              17.212245  23.146397  0.0  100.0     49
2               6.563448   8.905913  0.0   42.9    963


--- Feature: D1/A:AC_VEH ---
                   mean       std  min   max  count
final_cluster                                      
0              0.147584  0.129986  0.0  0.86    327
1              0.177551  0.241648  0.0  1.00     49
2              0.064901  0.088608  0.0  0.43    963


--- Feature: D1/A:Ind_VEH ---
                   mean        std  min     max  count
final_cluster                                         
0              3.849847   6.614723  0.0   73.47    327
1              8.454286  20.569155  0.0  100.00     49
2              1.215971   2.185411  0.0   18.37    963


--- Feature: D2.1:A_TV ---
                   mean       std  min  max  count
final_cluster                                     
0              0.782875  0.412921    0    1    327
1              0.551020  0.502545    0    1     49
2              0.512980  0.500091    0    1    963


--- Feature: D2.1-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.785933  0.410802    0    1    327
1              0.571429  0.500000    0    1     49
2              0.517134  0.502041    0    2    963


--- Feature: D2.2:A_DVD ---
                   mean       std  min  max  count
final_cluster                                     
0              0.195719  0.397361    0    1    327
1              0.571429  0.500000    0    1     49
2              0.083074  0.276137    0    1    963


--- Feature: D2.2-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.195719  0.397361    0    1    327
1              0.571429  0.500000    0    1     49
2              0.084112  0.281419    0    2    963


--- Feature: D2.3:A_WASH-M ---
                   mean       std  min  max  count
final_cluster                                     
0              0.703364  0.457475    0    1    327
1              0.489796  0.505076    0    1     49
2              0.145379  0.352666    0    1    963


--- Feature: D2.3-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.703364  0.457475    0    1    327
1              0.489796  0.505076    0    1     49
2              0.146417  0.356635    0    2    963


--- Feature: D2.4:A_AC ---
                   mean       std  min  max  count
final_cluster                                     
0              0.061162  0.239995    0    1    327
1              0.714286  0.456435    0    1     49
2              0.015576  0.123894    0    1    963


--- Feature: D2.4-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.073394  0.261183    0    1    327
1              0.714286  0.456435    0    1     49
2              0.019730  0.139143    0    1    963


--- Feature: D2.5:A_E-FAN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.892966  0.309630    0    1    327
1              0.571429  0.500000    0    1     49
2              0.650052  0.477201    0    1    963


--- Feature: D2.5-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.899083  0.321374    0    2    327
1              0.571429  0.500000    0    1     49
2              0.653167  0.482713    0    2    963


--- Feature: D2.6:A_FRIDGE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.660550  0.474248    0    1    327
1              0.571429  0.500000    0    1     49
2              0.071651  0.258043    0    1    963


--- Feature: D2.6-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.660550  0.474248    0    1    327
1              0.571429  0.500000    0    1     49
2              0.071651  0.258043    0    1    963


--- Feature: D2.7:A_STOVE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.642202  0.480087    0    1    327
1              0.551020  0.502545    0    1     49
2              0.106957  0.309220    0    1    963


--- Feature: D2.7-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.642202  0.480087    0    1    327
1              0.551020  0.502545    0    1     49
2              0.106957  0.309220    0    1    963


--- Feature: D2.8:A_E-HEATER ---
                   mean       std  min  max  count
final_cluster                                     
0              0.195719  0.397361    0    1    327
1              0.469388  0.504234    0    1     49
2              0.021807  0.146128    0    1    963


--- Feature: D2.8-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.214067  0.410802    0    1    327
1              0.469388  0.504234    0    1     49
2              0.021807  0.146128    0    1    963


--- Feature: D2.9:A_FURNITURE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.379205  0.485933    0    1    327
1              0.346939  0.480929    0    1     49
2              0.028037  0.165165    0    1    963


--- Feature: D2.9-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.379205  0.485933    0    1    327
1              0.346939  0.480929    0    1     49
2              0.028037  0.165165    0    1    963


--- Feature: D2.10:A_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.003058  0.055300    0    1    327
1              0.591837  0.496587    0    1     49
2              0.008307  0.090813    0    1    963


--- Feature: D2.10-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.024465  0.154724    0    1    327
1              0.591837  0.496587    0    1     49
2              0.006231  0.078728    0    1    963


--- Feature: D2/A:YC_APP ---
                    mean        std  min    max  count
final_cluster                                         
0              45.168196  15.952936  0.0   80.0    327
1              54.285714  28.795254  0.0  100.0     49
2              16.448598  11.465133  0.0   50.0    963


--- Feature: D2/A:AC_APP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.457798  0.170326  0.0  1.0    327
1              0.544898  0.290159  0.0  1.0     49
2              0.165628  0.117370  0.0  0.8    963


--- Feature: D2/A:Ind_APP ---
                    mean        std  min    max  count
final_cluster                                         
0              23.321101  14.688513  0.0   70.0    327
1              37.755102  31.963215  0.0  100.0     49
2               4.053998   4.647132  0.0   32.0    963


--- Feature: D3.1:A_CP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.856269  0.351354    0    1    327
1              0.489796  0.505076    0    1     49
2              0.636552  0.481242    0    1    963


--- Feature: D3.1-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.856269  0.351354    0    1    327
1              0.489796  0.505076    0    1     49
2              0.636552  0.481242    0    1    963


--- Feature: D3.2:A_LANDLINE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.122449  0.331201    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D3.2-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.142857  0.408248    0    2     49
2              0.001038  0.032225    0    1    963


--- Feature: D3.3:A_COMPUTER ---
                   mean       std  min  max  count
final_cluster                                     
0              0.070336  0.256105    0    1    327
1              0.244898  0.434483    0    1     49
2              0.006231  0.078728    0    1    963


--- Feature: D3.3-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.067278  0.250887    0    1    327
1              0.265306  0.490557    0    2     49
2              0.006231  0.078728    0    1    963


--- Feature: D3.4:A_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.015291  0.122894    0    1    327
1              0.489796  0.505076    0    1     49
2              0.007269  0.084992    0    1    963


--- Feature: D3.4-A_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.018349  0.134414    0    1    327
1              0.510204  0.544765    0    2     49
2              0.007269  0.084992    0    1    963


--- Feature: D3/A:YC_GAD ---
                    mean        std  min    max  count
final_cluster                                         
0              23.853211  11.936046  0.0   75.0    327
1              33.673469  24.233575  0.0  100.0     49
2              16.277259  12.402592  0.0   75.0    963


--- Feature: D3/A:AC_GAD ---
                   mean       std  min   max  count
final_cluster                                      
0              0.238532  0.120956  0.0  0.75    327
1              0.352041  0.301551  0.0  1.75     49
2              0.162773  0.124026  0.0  0.75    963


--- Feature: D3/A:IndGAD ---
                    mean        std  min     max  count
final_cluster                                          
0               7.110092   6.501896  0.0   56.25    327
1              18.622449  30.605058  0.0  175.00     49
2               4.186137   3.807333  0.0   56.25    963


--- Feature: D/A:AVE_Ind_PA ---
                    mean        std  min   max  count
final_cluster                                        
0              27.946177   9.241908  0.0  55.5    327
1              35.061224  18.025073  6.7  69.3     49
2              13.024922   7.309862  0.0  44.5    963


--- Feature: E1:A_DRINK-H2O ---
                   mean       std  min  max  count
final_cluster                                     
0              2.883792  1.090371    1    4    327
1              2.693878  1.261680    1    4     49
2              2.679128  1.065354    1    4    963


--- Feature: E2:A_DOMESTIC-H2O ---
                   mean       std  min  max  count
final_cluster                                     
0              2.617737  1.302684    1    4    327
1              2.306122  1.261680    1    4     49
2              2.586708  1.255967    1    4    963


--- Feature: E3:A_POWER-SUP ---
                   mean       std  min  max  count
final_cluster                                     
0              2.633028  0.717792    0    3    327
1              2.693878  0.741734    0    3     49
2              2.329180  1.006553    0    3    963


--- Feature: E4:A_COOK-FUEL ---
                   mean       std  min  max  count
final_cluster                                     
0              2.740061  0.898030    1    4    327
1              2.836735  0.943110    2    4     49
2              2.191070  0.687612    1    4    963


--- Feature: E5:A_NET-SUBS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.140673  0.348217    0    1    327
1              0.346939  0.480929    0    1     49
2              0.043614  0.204340    0    1    963


--- Feature: E/A:Ind_LIFECON ---
                    mean        std   min    max  count
final_cluster                                          
0              61.621101  13.801021  33.3  100.0    327
1              64.079592  17.416898  38.3  100.0     49
2              54.565836  13.123626  15.0  100.0    963


--- Feature: F1:A_HOUSE-OWN ---
                   mean       std  min  max  count
final_cluster                                     
0              2.688073  0.683243    1    3    327
1              2.693878  0.683255    1    3     49
2              2.425753  0.832950    1    3    963


--- Feature: F2:A_HOUSE-ACQ ---
                   mean       std  min  max  count
final_cluster                                     
0              2.665138  1.229223  0.0  3.5    327
1              2.775510  1.270913  0.0  3.5     49
2              2.233645  1.418958  0.0  4.0    963


--- Feature: F3:A_HOUSE-BUILT ---
                   mean       std  min  max  count
final_cluster                                     
0              2.321101  0.771987  1.0  4.0    327
1              2.500000  0.866025  1.0  4.0     49
2              1.841121  0.903730  1.0  4.0    963


--- Feature: F4:A_OTHER-RP ---
                   mean       std  min  max  count
final_cluster                                     
0              2.605505  1.317194  0.0  4.0    327
1              2.775510  1.380901  0.0  4.0     49
2              2.287643  1.452943  0.0  4.0    963


--- Feature: F/A:Ind_REALP ---
                    mean        std   min    max  count
final_cluster                                          
0              72.232416  19.785450  14.6  100.0    327
1              75.253061  20.713050  20.8  100.0     49
2              61.974247  21.213947  14.6  100.0    963


--- Feature: G1:A_SSS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.339450  0.474248    0    1    327
1              0.244898  0.434483    0    1     49
2              0.168224  0.374260    0    1    963


--- Feature: G2:A_GSIS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.027523  0.163852    0    1    327
1              0.326531  0.473804    0    1     49
2              0.018692  0.135504    0    1    963


--- Feature: G3:A_PhilHealth ---
                   mean       std  min  max  count
final_cluster                                     
0              0.519878  0.500370    0    1    327
1              0.387755  0.492287    0    1     49
2              0.473520  0.499558    0    1    963


--- Feature: G4:A_PN-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.067278  0.250887    0    1    327
1              0.102041  0.305839    0    1     49
2              0.051921  0.221983    0    1    963


--- Feature: G5:A_LIFE-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.076453  0.266128    0    1    327
1              0.102041  0.305839    0    1     49
2              0.036345  0.187244    0    1    963


--- Feature: G6:A_HEALTH-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.082569  0.275651    0    1    327
1              0.102041  0.305839    0    1     49
2              0.023884  0.152766    0    1    963


--- Feature: G/A:Ind_INSU ---
                    mean        std  min    max  count
final_cluster                                         
0              22.262997  22.231726  0.0  120.0    327
1              25.306122  27.010958  0.0  120.0     49
2              15.171340  16.896614  0.0  100.0    963


--- Feature: A: SES_INDEX ---
                    mean        std   min   max  count
final_cluster                                         
0              46.197554   9.209208   0.0  73.8    327
1              50.406122  13.180453  23.1  76.8     49
2              34.109761  11.368133   0.0  61.6    963


--- Feature: D1.1:B_BIKE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.256881  0.437583    0    1    327
1              0.408163  0.496587    0    1     49
2              0.222222  0.415956    0    1    963


--- Feature: D1.1-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.256881  0.437583    0    1    327
1              0.408163  0.496587    0    1     49
2              0.222222  0.415956    0    1    963


--- Feature: D1.2:B_MOTORC ---
                   mean       std  min  max  count
final_cluster                                     
0              0.608563  0.488820    0    1    327
1              0.367347  0.487078    0    1     49
2              0.340602  0.474158    0    1    963


--- Feature: D1.2-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.608563  0.488820    0    1    327
1              0.367347  0.487078    0    1     49
2              0.340602  0.474158    0    1    963


--- Feature: D1.3:B_TRICYCLE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.211009  0.408650    0    1    327
1              0.102041  0.305839    0    1     49
2              0.044652  0.206646    0    1    963


--- Feature: D1.3-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.211009  0.408650    0    1    327
1              0.102041  0.305839    0    1     49
2              0.043614  0.204340    0    1    963


--- Feature: D1.4:B_CAR ---
                   mean       std  min  max  count
final_cluster                                     
0              0.021407  0.144958    0    1    327
1              0.122449  0.331201    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D1.4-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.024465  0.154724    0    1    327
1              0.122449  0.331201    0    1     49
2              0.002077  0.045549    0    1    963


--- Feature: D1.5:B_JEEP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.061224  0.242226    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D1.5-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.015291  0.122894    0    1    327
1              0.061224  0.242226    0    1     49
2              0.002077  0.045549    0    1    963


--- Feature: D1.6:B_TRUCK ---
                   mean       std  min  max  count
final_cluster                                     
0              0.000000  0.000000    0    0    327
1              0.081633  0.276642    0    1     49
2              0.000000  0.000000    0    0    963


--- Feature: D1.6-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.006116  0.078086    0    1    327
1              0.081633  0.276642    0    1     49
2              0.001038  0.032225    0    1    963


--- Feature: D1.7:B_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.006116  0.078086    0    1    327
1              0.448980  0.502545    0    1     49
2              0.021807  0.146128    0    1    963


--- Feature: D1.7-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.009174  0.095488    0    1    327
1              0.448980  0.502545    0    1     49
2              0.021807  0.146128    0    1    963


--- Feature: D1/B:YC_VEH ---
                    mean        std  min    max  count
final_cluster                                         
0              15.961468  11.464752  0.0   57.1    327
1              22.755102  21.212163  0.0  100.0     49
2               8.983904   9.898226  0.0   42.9    963


--- Feature: D1/B:AC_VEH ---
                   mean       std  min   max  count
final_cluster                                      
0              0.161162  0.121116  0.0  0.71    327
1              0.227347  0.212953  0.0  1.00     49
2              0.089346  0.101022  0.0  0.71    963


--- Feature: D1/B:Ind_VEH ---
                   mean        std  min     max  count
final_cluster                                         
0              3.899633   4.753102  0.0   32.65    327
1              9.578163  20.208343  0.0  100.00     49
2              1.792222   2.800811  0.0   20.41    963


--- Feature: D2.1:B_TV ---
                   mean       std  min  max  count
final_cluster                                     
0              0.828746  0.377308    0    1    327
1              0.591837  0.496587    0    1     49
2              0.611630  0.487633    0    1    963


--- Feature: D2.1-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.828746  0.377308    0    1    327
1              0.591837  0.496587    0    1     49
2              0.611630  0.487633    0    1    963


--- Feature: D2.2:B_DVD ---
                   mean       std  min  max  count
final_cluster                                     
0              0.192661  0.394993    0    1    327
1              0.489796  0.505076    0    1     49
2              0.082035  0.274561    0    1    963


--- Feature: D2.2-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.192661  0.394993    0    1    327
1              0.510204  0.505076    0    1     49
2              0.083074  0.276137    0    1    963


--- Feature: D2.3:B_WASH-M ---
                   mean       std  min  max  count
final_cluster                                     
0              0.767584  0.423020    0    1    327
1              0.387755  0.492287    0    1     49
2              0.241952  0.428488    0    1    963


--- Feature: D2.3-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.767584  0.423020    0    1    327
1              0.387755  0.492287    0    1     49
2              0.241952  0.428488    0    1    963


--- Feature: D2.4:B_AC ---
                   mean       std  min  max  count
final_cluster                                     
0              0.085627  0.280241    0    1    327
1              0.775510  0.421570    0    1     49
2              0.024922  0.155969    0    1    963


--- Feature: D2.4-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.085627  0.280241    0    1    327
1              0.775510  0.421570    0    1     49
2              0.024922  0.155969    0    1    963


--- Feature: D2.5:B_E-FAN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.920489  0.270949    0    1    327
1              0.673469  0.473804    0    1     49
2              0.792316  0.405860    0    1    963


--- Feature: D2.5-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.935780  0.311593    0    3    327
1              0.693878  0.508432    0    2     49
2              0.802700  0.421009    0    2    963


--- Feature: D2.6:B_FRIDGE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.758410  0.428703    0    1    327
1              0.591837  0.496587    0    1     49
2              0.128764  0.335113    0    1    963


--- Feature: D2.6-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.758410  0.428703    0    1    327
1              0.591837  0.496587    0    1     49
2              0.128764  0.335113    0    1    963


--- Feature: D2.7:B_STOVE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.740061  0.439273    0    1    327
1              0.571429  0.500000    0    1     49
2              0.149533  0.356798    0    1    963


--- Feature: D2.7-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.740061  0.439273    0    1    327
1              0.571429  0.500000    0    1     49
2              0.149533  0.356798    0    1    963


--- Feature: D2.8:B_E-HEATER ---
                   mean       std  min  max  count
final_cluster                                     
0              0.232416  0.423020    0    1    327
1              0.469388  0.504234    0    1     49
2              0.035306  0.184649    0    1    963


--- Feature: D2.8-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.232416  0.423020    0    1    327
1              0.469388  0.504234    0    1     49
2              0.035306  0.184649    0    1    963


--- Feature: D2.9:B_FURNITURE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.452599  0.498511    0    1    327
1              0.346939  0.480929    0    1     49
2              0.039460  0.194788    0    1    963


--- Feature: D2.9-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.452599  0.498511    0    1    327
1              0.346939  0.480929    0    1     49
2              0.038422  0.192312    0    1    963


--- Feature: D2.10:B_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.428571  0.500000    0    1     49
2              0.006231  0.078728    0    1    963


--- Feature: D2.10-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.428571  0.500000    0    1     49
2              0.006231  0.078728    0    1    963


--- Feature: D2/B:YC_APP ---
                    mean        std   min    max  count
final_cluster                                          
0              49.908257  13.532198  10.0   90.0    327
1              53.265306  25.689472  10.0  100.0     49
2              21.121495  11.854488   0.0   60.0    963


--- Feature: D2/B:AC_APP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.500612  0.136340  0.1  1.0    327
1              0.536735  0.259562  0.1  1.0     49
2              0.212253  0.119229  0.0  0.6    963


--- Feature: D2/B:Ind_APP ---
                    mean        std  min    max  count
final_cluster                                         
0              26.813456  14.382550  1.0   90.0    327
1              35.102041  28.426253  1.0  100.0     49
2               5.888889   5.791519  0.0   36.0    963


--- Feature: D3.1:B_CP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.935780  0.245520    0    1    327
1              0.530612  0.504234    0    1     49
2              0.778816  0.415260    0    1    963


--- Feature: D3.1-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.935780  0.245520    0    1    327
1              0.530612  0.504234    0    1     49
2              0.778816  0.415260    0    1    963


--- Feature: D3.2:B_LANDLINE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.018349  0.134414    0    1    327
1              0.224490  0.421570    0    1     49
2              0.002077  0.045549    0    1    963


--- Feature: D3.2-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.018349  0.134414    0    1    327
1              0.224490  0.468388    0    2     49
2              0.002077  0.045549    0    1    963


--- Feature: D3.3:B_COMPUTER ---
                   mean       std  min  max  count
final_cluster                                     
0              0.116208  0.320965    0    1    327
1              0.224490  0.421570    0    1     49
2              0.013499  0.115460    0    1    963


--- Feature: D3.3-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.113150  0.317261    0    1    327
1              0.244898  0.480044    0    2     49
2              0.013499  0.115460    0    1    963


--- Feature: D3.4:B_OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.285714  0.456435    0    1     49
2              0.005192  0.071906    0    1    963


--- Feature: D3.4-B_QTY ---
                   mean       std  min  max  count
final_cluster                                     
0              0.012232  0.110090    0    1    327
1              0.306122  0.508432    0    2     49
2              0.005192  0.071906    0    1    963


--- Feature: D3/B:YC_GAD ---
                    mean        std  min    max  count
final_cluster                                         
0              27.064220  11.482402  0.0   75.0    327
1              31.632653  24.882804  0.0  100.0     49
2              19.989616  11.119737  0.0   75.0    963


--- Feature: D3/B:AC_GAD ---
                   mean       std  min   max  count
final_cluster                                      
0              0.269878  0.114122  0.0  0.75    327
1              0.326531  0.307053  0.0  1.75     49
2              0.199896  0.111197  0.0  0.75    963


--- Feature: D3/B:IndGAD ---
                    mean        std  min     max  count
final_cluster                                          
0               8.600917   7.868731  0.0   56.25    327
1              17.346939  29.817842  0.0  175.00     49
2               5.231049   4.126755  0.0   56.25    963


--- Feature: D/B:AVE_Ind_PA ---
                    mean        std   min    max  count
final_cluster                                          
0              30.972477   7.478005  10.0  57.86    327
1              35.880000  16.543813   8.1  75.00     49
2              16.578006   7.145593   0.0  35.95    963


--- Feature: E1:B_DRINK-H2O ---
                   mean       std  min  max  count
final_cluster                                     
0              3.143731  1.065702    1    4    327
1              2.734694  1.237866    1    4     49
2              2.845275  1.083780    1    4    963


--- Feature: E2:B_DOMESTIC-H2O ---
                   mean       std  min  max  count
final_cluster                                     
0              2.764526  1.297554    1    4    327
1              2.224490  1.262691    1    4     49
2              2.647975  1.279806    1    4    963


--- Feature: E3:B_POWER-SUP ---
                   mean       std  min  max  count
final_cluster                                     
0              2.798165  0.642748    0    3    327
1              2.755102  0.778102    0    3     49
2              2.623053  0.828399    0    3    963


--- Feature: E4:B_COOK-FUEL ---
                   mean       std  min  max  count
final_cluster                                     
0              3.113150  0.947735    1    4    327
1              2.836735  0.920755    2    4     49
2              2.273105  0.816395    1    4    963


--- Feature: E5:B_NET-SUBS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.201835  0.401985    0    1    327
1              0.346939  0.480929    0    1     49
2              0.062305  0.241835    0    1    963


--- Feature: E/B:Ind_LIFECON ---
                    mean        std   min    max  count
final_cluster                                          
0              67.817125  15.185683  35.0  100.0    327
1              64.285714  17.816063  40.0  100.0     49
2              57.923364  13.237665  15.0  100.0    963


--- Feature: F1:B_HOUSE-OWN ---
                   mean       std  min  max  count
final_cluster                                     
0              2.675841  0.690956    1    3    327
1              2.775510  0.586846    1    3     49
2              2.442368  0.825498    1    3    963


--- Feature: F2:B_HOUSE-ACQ ---
                   mean       std  min  max  count
final_cluster                                     
0              2.654434  1.237144  0.0  3.5    327
1              2.795918  1.249660  0.0  3.5     49
2              2.251817  1.404139  0.0  4.0    963


--- Feature: F3:B_HOUSE-BUILT ---
                   mean       std  min  max  count
final_cluster                                     
0              2.325688  0.786317  1.0  4.0    327
1              2.408163  0.833376  1.0  4.0     49
2              1.861371  0.904047  1.0  4.0    963


--- Feature: F4:B_OTHER-RP ---
                   mean       std  min  max  count
final_cluster                                     
0              2.633028  1.319931  0.0  4.0    327
1              2.775510  1.380901  0.0  4.0     49
2              2.298027  1.446182  0.0  4.0    963


--- Feature: F/B:Ind_REALP ---
                    mean        std   min     max  count
final_cluster                                           
0              72.253425  19.603687  14.6  100.00    327
1              75.504082  20.555869  20.8  100.00     49
2              62.363541  21.022201  14.6  105.56    963


--- Feature: G1:B_SSS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.333333  0.472127    0    1    327
1              0.224490  0.421570    0    1     49
2              0.145379  0.352666    0    1    963


--- Feature: G2:B_GSIS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.033639  0.180575    0    1    327
1              0.367347  0.487078    0    1     49
2              0.021807  0.146128    0    1    963


--- Feature: G3:B_PhilHealth ---
                   mean       std  min  max  count
final_cluster                                     
0              0.547401  0.498511    0    1    327
1              0.408163  0.496587    0    1     49
2              0.495327  0.500238    0    1    963


--- Feature: G4:B_PN-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.088685  0.284724    0    1    327
1              0.142857  0.353553    0    1     49
2              0.056075  0.230186    0    1    963


--- Feature: G5:B_LIFE-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.094801  0.293389    0    1    327
1              0.102041  0.305839    0    1     49
2              0.049844  0.217736    0    1    963


--- Feature: G6:B_HEALTH-IN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.097859  0.297580    0    1    327
1              0.142857  0.353553    0    1     49
2              0.032191  0.176599    0    1    963


--- Feature: G/B:Ind_INSU ---
                    mean        std  min    max  count
final_cluster                                         
0              23.914373  23.168468  0.0  100.0    327
1              27.755102  28.523770  0.0  120.0     49
2              15.638629  17.075174  0.0  120.0    963


--- Feature: B: SES_INDEX ---
                    mean        std   min   max  count
final_cluster                                         
0              48.987156   9.521581   0.0  74.8    327
1              51.259184  12.708313  22.0  82.2     49
2              36.216511  11.715779   0.0  67.7    963


--- Feature: H1:4Ps ---
                   mean       std  min  max  count
final_cluster                                     
0              0.168196  0.382714  0.0  2.0    327
1              0.091837  0.282722  0.0  1.0     49
2              0.359813  0.576817  0.0  2.0    963


--- Feature: H2:RET_P ---
                   mean       std  min  max  count
final_cluster                                     
0              0.097859  0.302690  0.0  2.0    327
1              0.132653  0.335030  0.0  1.0     49
2              0.161994  0.492384  0.0  2.0    963


--- Feature: H3:SPES ---
                   mean       std  min  max  count
final_cluster                                     
0              0.056575  0.239525  0.0  2.0    327
1              0.051020  0.210280  0.0  1.0     49
2              0.137591  0.474417  0.0  2.0    963


--- Feature: H4:AL_P ---
                   mean       std  min  max  count
final_cluster                                     
0              0.146789  0.358739  0.0  2.0    327
1              0.132653  0.335030  0.0  1.0     49
2              0.188474  0.509313  0.0  2.0    963


--- Feature: H5:TBE ---
                   mean       std  min  max  count
final_cluster                                     
0              0.120795  0.332210  0.0  2.0    327
1              0.132653  0.335030  0.0  1.0     49
2              0.167705  0.496522  0.0  2.0    963


--- Feature: H6:F_PC ---
                   mean       std  min  max  count
final_cluster                                     
0              0.096330  0.299363  0.0  2.0    327
1              0.091837  0.263658  0.0  1.0     49
2              0.210280  0.523033  0.0  2.0    963


--- Feature: H7:AS_P ---
                   mean       std  min  max  count
final_cluster                                     
0              0.136086  0.348943  0.0  2.0    327
1              0.275510  0.445594  0.0  1.0     49
2              0.174455  0.500199  0.0  2.0    963


--- Feature: H8:E/CW_P ---
                   mean       std  min  max  count
final_cluster                                     
0              0.244648  0.434085  0.0  2.0    327
1              0.102041  0.269637  0.0  1.0     49
2              0.274143  0.550587  0.0  2.0    963


--- Feature: H:Ind_ASWS ---
                    mean        std  min    max  count
final_cluster                                         
0              12.730887  16.214221  0.0  100.0    327
1              12.634694  13.598170  0.0   56.3     49
2              10.171028  13.876757  0.0  100.0    963


--- Feature: I1:FD_Y ---
                   mean       std  min  max  count
final_cluster                                     
0              3.428135  0.940012    1    4    327
1              2.775510  1.104259    1    4     49
2              3.595016  0.757537    1    4    963


--- Feature: I2:A/C_M ---
                   mean       std  min  max  count
final_cluster                                     
0              1.657492  0.475276    1    2    327
1              1.510204  0.505076    1    2     49
2              1.595016  0.491144    1    2    963


--- Feature: I3:NOP_H ---
                   mean       std  min  max  count
final_cluster                                     
0              1.140673  1.272080    0    7    327
1              0.816327  1.093036    0    5     49
2              0.987539  1.092333    0   10    963


--- Feature: I4:TFA ---
                   mean       std  min  max  count
final_cluster                                     
0              1.174312  0.432797    1    3    327
1              1.571429  0.866025    1    3     49
2              1.177570  0.442817    1    3    963


--- Feature: I5:TFV ---
                   mean       std  min  max  count
final_cluster                                     
0              2.905199  0.956189    1    5    327
1              2.204082  1.098855    1    4     49
2              2.884735  0.905174    1    5    963


--- Feature: I6.1:GN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.577982  0.494638    0    1    327
1              0.387755  0.492287    0    1     49
2              0.504673  0.500238    0    1    963


--- Feature: I6.2:FT ---
                   mean       std  min  max  count
final_cluster                                     
0              0.162080  0.369089    0    1    327
1              0.081633  0.276642    0    1     49
2              0.095535  0.294105    0    1    963


--- Feature: I6.3:PPN ---
                   mean       std  min  max  count
final_cluster                                     
0              0.113150  0.317261    0    1    327
1              0.183673  0.391230    0    1     49
2              0.110073  0.313143    0    1    963


--- Feature: I6.4:H&L ---
                   mean       std  min  max  count
final_cluster                                     
0              0.660550  0.474248    0    1    327
1              0.387755  0.492287    0    1     49
2              0.603323  0.489462    0    1    963


--- Feature: I6.5:OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.116208  0.320965    0    1    327
1              0.224490  0.421570    0    1     49
2              0.104881  0.306559    0    1    963


--- Feature: I7.1:LIFE_J ---
                   mean       std  min  max  count
final_cluster                                     
0              0.357798  0.480087    0    1    327
1              0.285714  0.456435    0    1     49
2              0.283489  0.450926    0    1    963


--- Feature: I7.2:LIFE_B ---
                   mean       std  min  max  count
final_cluster                                     
0              0.211009  0.408650    0    1    327
1              0.367347  0.487078    0    1     49
2              0.112150  0.315714    0    1    963


--- Feature: I7.3:F_LIGHT ---
                   mean       std  min  max  count
final_cluster                                     
0              0.804281  0.397361    0    1    327
1              0.551020  0.502545    0    1     49
2              0.861890  0.345195    0    1    963


--- Feature: I7.4:CP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.409786  0.492548    0    1    327
1              0.306122  0.465657    0    1     49
2              0.296989  0.457169    0    1    963


--- Feature: I7.5:OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.070336  0.256105    0    1    327
1              0.387755  0.492287    0    1     49
2              0.049844  0.217736    0    1    963


--- Feature: I8.1:BAD_W ---
                   mean       std  min  max  count
final_cluster                                     
0              0.932722  0.250887    0    1    327
1              0.571429  0.500000    0    1     49
2              0.900312  0.299740    0    1    963


--- Feature: I8.2:FISH_R ---
                   mean       std  min  max  count
final_cluster                                     
0              0.226300  0.419077    0    1    327
1              0.285714  0.456435    0    1     49
2              0.211838  0.408823    0    1    963


--- Feature: I8.3:BOAT_P ---
                   mean       std  min  max  count
final_cluster                                     
0              0.529052  0.499920    0    1    327
1              0.306122  0.465657    0    1     49
2              0.590862  0.491930    0    1    963


--- Feature: I8.4:FISH_COMP ---
                   mean       std  min  max  count
final_cluster                                     
0              0.330275  0.471033    0    1    327
1              0.204082  0.407206    0    1     49
2              0.313603  0.464198    0    1    963


--- Feature: I8.5:OTHERS ---
                   mean       std  min  max  count
final_cluster                                     
0              0.070336  0.256105    0    1    327
1              0.204082  0.407206    0    1     49
2              0.063344  0.243707    0    1    963


--- Feature: Y_BOAT-RE ---
                      mean       std     min     max  count
final_cluster                                              
0              2017.269113  0.825807  2015.0  2023.0    327
1              2017.040816  0.351140  2016.0  2018.0     49
2              2017.251298  0.841483  2014.0  2023.0    963


--- Feature: NY_W/BOAT ---
                   mean       std  min   max  count
final_cluster                                      
0              6.730887  0.825807  1.0   9.0    327
1              6.959184  0.351140  6.0   8.0     49
2              6.748702  0.841483  1.0  10.0    963


--- Feature: BOAT_COND ---
                   mean       std  min  max  count
final_cluster                                     
0              1.880734  0.324598  1.0  2.0    327
1              1.653061  0.480929  1.0  2.0     49
2              1.865005  0.341896  1.0  2.0    963


--- Feature: J1:BOAT_AGREE ---
                   mean       std  min  max  count
final_cluster                                     
0              1.877676  0.328162  1.0  2.0    327
1              1.897959  0.305839  1.0  2.0     49
2              1.881620  0.323225  1.0  2.0    963


--- Feature: J2:BOAT_TYPE ---
                   mean       std  min  max  count
final_cluster                                     
0              1.018349  0.134414  1.0  2.0    327
1              1.020408  0.142857  1.0  2.0     49
2              1.034268  0.182011  1.0  2.0    963


--- Feature: J3:BOAT_DESIGN ---
                   mean       std  min  max  count
final_cluster                                     
0              3.871560  0.670604  1.0  6.0    327
1              3.897959  0.510102  1.0  4.0     49
2              3.825545  0.748312  1.0  6.0    963


--- Feature: J4:BOAT_COND ---
                   mean       std  min  max  count
final_cluster                                     
0              1.180428  0.385133  1.0  2.0    327
1              1.326531  0.473804  1.0  2.0     49
2              1.201454  0.401295  1.0  2.0    963


--- Feature: J4: REASON-NO ---
                   mean       std  min  max  count
final_cluster                                     
0              0.412844  0.970842  0.0  4.0    327
1              0.612245  1.095756  0.0  4.0     49
2              0.440291  0.979688  0.0  4.0    963


--- Feature: J5.1 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.675841  0.882062  1.0  5.0    327
1              4.857143  0.408248  3.0  5.0     49
2              4.656282  0.882511  1.0  5.0    963


--- Feature: J5.2 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.718654  0.730821  1.0  5.0    327
1              4.714286  0.763763  1.0  5.0     49
2              4.759086  0.640059  1.0  5.0    963


--- Feature: J5.3 ---
                   mean       std  min  max  count
final_cluster                                     
0              2.076453  0.823521  1.0  5.0    327
1              2.163265  1.086795  1.0  5.0     49
2              2.018692  0.860102  1.0  5.0    963


--- Feature: J5.4 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.785933  0.629089  1.0  5.0    327
1              4.775510  0.770965  1.0  5.0     49
2              4.818276  0.561404  1.0  5.0    963


--- Feature: J5.5 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.740061  0.719801  1.0  5.0    327
1              4.816327  0.666879  1.0  5.0     49
2              4.773624  0.664398  1.0  5.0    963


--- Feature: J5.6 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.507645  1.148126  1.0  5.0    327
1              4.408163  1.223355  1.0  5.0     49
2              4.356179  1.298020  1.0  5.0    963


--- Feature: J5.7 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.767584  0.669989  1.0  5.0    327
1              4.632653  0.928571  1.0  5.0     49
2              4.811007  0.607989  1.0  5.0    963


--- Feature: J6:AVE_FBP-IMPT ---
                   mean       std   min  max  count
final_cluster                                      
0              3.984037  0.389823  2.00  5.0    327
1              4.081633  0.394902  2.29  5.0     49
2              4.003292  0.359941  2.00  5.0    963


--- Feature: J6.1 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.737003  0.690386  1.0  5.0    327
1              4.632653  0.882560  1.0  5.0     49
2              4.784008  0.564981  1.0  5.0    963


--- Feature: J6.2 ---
                   mean       std  min  max  count
final_cluster                                     
0              3.993884  0.678296  1.0  5.0    327
1              3.775510  1.085229  1.0  5.0     49
2              4.121495  0.675365  1.0  5.0    963


--- Feature: J6.3 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.773700  0.589410  1.0  5.0    327
1              4.734694  0.531331  3.0  5.0     49
2              4.788162  0.553561  1.0  5.0    963


--- Feature: J6.4 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.767584  0.602491  1.0  5.0    327
1              4.714286  0.763763  2.0  5.0     49
2              4.802700  0.558913  1.0  5.0    963


--- Feature: J6.5 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.767584  0.597378  1.0  5.0    327
1              4.816327  0.486204  3.0  5.0     49
2              4.800623  0.531463  1.0  5.0    963


--- Feature: J6.6 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.721713  0.712887  1.0  5.0    327
1              4.836735  0.472005  3.0  5.0     49
2              4.777778  0.601837  1.0  5.0    963


--- Feature: J6:AVE_FBP-PERC ---
                   mean       std   min  max  count
final_cluster                                      
0              4.529541  0.477845  1.00  5.0    327
1              4.511633  0.494913  2.67  5.0     49
2              4.590312  0.440021  1.00  5.0    963


--- Feature: J7.1 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.532110  1.115167  1.0  5.0    327
1              4.387755  1.304101  1.0  5.0     49
2              4.390447  1.265729  1.0  5.0    963


--- Feature: J7.2 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.648318  0.811264  1.0  5.0    327
1              4.632653  0.858630  1.0  5.0     49
2              4.715472  0.694528  1.0  5.0    963


--- Feature: J7.3 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.703364  0.683339  1.0  5.0    327
1              4.653061  0.778648  1.0  5.0     49
2              4.718588  0.675328  1.0  5.0    963


--- Feature: J7.4 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.770642  0.595994  1.0  5.0    327
1              4.673469  0.774267  1.0  5.0     49
2              4.767394  0.601364  1.0  5.0    963


--- Feature: J7.5 ---
                   mean       std  min  max  count
final_cluster                                     
0              4.770642  0.758991  1.0  5.0    327
1              4.673469  1.028505  1.0  5.0     49
2              4.663551  0.996350  1.0  5.0    963


--- Feature: J7:AVE_FBP-CONT ---
                   mean       std  min  max  count
final_cluster                                     
0              4.207951  0.411861  2.6  5.0    327
1              4.244898  0.497100  2.4  5.0     49
2              4.216615  0.481567  1.0  5.0    963


In [161]:
sns.set(style="whitegrid")

PALETTE = sns.color_palette("husl", len(DF['final_cluster'].unique()))

for FEATURE in DF.columns:
    if FEATURE == 'final_cluster':
        continue

    plt.figure(figsize=(8, 4))
    
    JITTER = np.random.normal(0, 0.03, size=len(DF))

    sns.scatterplot(
        x=DF[FEATURE],
        y=JITTER,
        hue=DF['final_cluster'],
        palette=PALETTE,
        marker='o',
        edgecolor='w',
        s=70
    )

    plt.title(f'Cluster Distribution by Feature: {FEATURE}')
    plt.xlabel(FEATURE)
    plt.ylabel('Jitter (for visualization only)')
    plt.legend(title='Cluster')
    plt.tight_layout()
    plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image